# -*- coding: utf-8 -*-
import scrapy
from ipproxy.items import IpproxyItem


class KaiSpider(scrapy.Spider):
    name = 'kai'
    allowed_domains = ['www.kuaidaili.com']

    def __init__(self, page_count=3, *args, **kwargs):
        super(KaiSpider, self).__init__(*args, **kwargs)
        self.start_urls = []
        for i in range(1, page_count+1):
            self.start_urls.append('http://www.kuaidaili.com/free/inha/{0}'.format(i))

    def parse(self, response):
        all_trs = response.xpath('//tr')
        for tr in all_trs[1:]:
            row = IpproxyItem()
            ipinfo = tr.xpath('td/text()').extract()
            row['ip'] = ipinfo[0]
            row['port'] = ipinfo[1]
            row['proxy_type'] = ipinfo[3]
            row['anonymous'] = ipinfo[2]
            row['country'] = ipinfo[4]
            row['speed'] = float(ipinfo[5].split("秒")[0])
            row['checked_time'] = ipinfo[6]
            row['proxy_name'] = self.name
            yield row
        pass
